# -*- coding: utf-8 -*-
import random
from scrapy import Request
from zc_core.spiders.base import BaseSpider
from abchina.rules import *


class CatalogSpider(BaseSpider):
    name = 'catalog'
    # 常用链接
    root_catalog_url = 'https://e.abchina.com/qyjc/site/GMallCatg/QueryPageList'
    sub_catalog_url = 'https://e.abchina.com/qyjc/site/GMallCatg/GetCatgByFirst'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(CatalogSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 100

    def start_requests(self):
        # GET----------------
        params = json.dumps({
            "CurPage": 1,
            "PageSize": 99,
            "catgId": 0,
            "hasSubject": 'false',
            "InterfaceName": "GMallCatg/QueryPageList"
        })
        yield Request(
            method='GET',
            url=self.root_catalog_url,
            callback=self.parse_root_catalog,
            headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                'X-EncryptType': 'encrypt',
            },
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
                'params': params,
            },
            errback=self.error_back,
            priority=100,
            dont_filter=True
        )
        # GET----------------

    def parse_root_catalog(self, response):
        meta = response.meta
        cat_list = parse_root_catalog(response)
        random.shuffle(cat_list)
        if cat_list:
            self.logger.info('主分类->(%s)' % len(cat_list))
            yield Box('catalog', self.batch_no, cat_list)

            for cat in cat_list:
                # 采集子分类
                # GET----------------
                params = json.dumps({"id": cat.get('catalogId'), "InterfaceName": "GMallCatg/GetCatgByFirst"})
                yield Request(
                    method='GET',
                    url=self.sub_catalog_url,
                    callback=self.parse_sub_catalog,
                    headers={
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                        'X-EncryptType': 'encrypt',
                    },
                    meta={
                        'reqType': 'catalog',
                        'batchNo': meta.get('batchNo', self.batch_no),
                        'parentId': cat.get('catalogId'),
                        'params': params,
                    },
                    errback=self.error_back,
                    priority=100,
                    dont_filter=True
                )
                # GET----------------

    def parse_sub_catalog(self, response):
        meta = response.meta
        cat_list = parse_sub_catalog(response)
        if cat_list:
            self.logger.info('子分类[%s]->(%s)' % (meta.get('parentId'), len(cat_list)))
            yield Box('catalog', self.batch_no, cat_list)
